title: “Geography 176A” author: “Van Jackson” subtitle: ‘Lab 04: Tessalations, Point in Polygon’ output: html_document: theme: journal
#Libraries
# SPDS
library(tidyverse) # data wrangling
## ── Attaching packages ─────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.1
## ✓ tidyr 1.1.1 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readxl) # import xlsx data
library(sf) # Working with vector data
## Linking to GEOS 3.8.1, GDAL 3.1.1, PROJ 6.3.1
library(rmapshaper)# Simplify geometries
## Registered S3 method overwritten by 'geojsonlint':
## method from
## print.location dplyr
library(units) # manage your units
## udunits system database from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/units/share/udunits
#Data
library(USAboundaries) # county boundaries
#Visualization
library(gghighlight) # ggplot conditional highlighting
library(knitr) # table generation
library(kableExtra) # making tables pretty
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
#Question 1
#1.1
CONUS = USAboundaries::us_counties(resolution = "low") %>%
filter(state_name != "Hawaii") %>%
filter(state_name != "Alaska") %>%
filter(state_name != "Puerto Rico") %>%
st_transform(5070)
CONUS_u = st_union(CONUS)
CONUS_simp = ms_simplify(CONUS_u, keep = .05)
CONUSpnts = mapview:: npts(CONUS)
simppts = mapview::npts(CONUS_simp)
#1.2
county_centroid = st_centroid(CONUS) %>%
st_combine()
## Warning in st_centroid.sf(CONUS): st_centroid assumes attributes are constant
## over geometries of x
#1.3 -1.5 Tessalations
#Voronoi
v_grid = st_voronoi(county_centroid) %>%
st_cast() %>%
st_as_sf() %>%
mutate(id = 1:n())
#Triangulated
t_grid = st_triangulate(county_centroid) %>%
st_cast() %>%
st_as_sf() %>%
mutate(id = 1:n())
#Grid Coverage
sq_grid = st_make_grid(CONUS_simp, n = c(70, 50)) %>%
st_cast() %>%
st_as_sf() %>%
mutate(id = 1:n())
#Hexagonal Coverage
hex_grid = st_make_grid(CONUS_simp, n = c(50,70), square = FALSE) %>%
st_cast() %>%
st_as_sf() %>%
mutate(id = 1:n())
#1.6 Plot
plot_tess = function(data, title)
{ggplot() +
geom_sf(data = data, fill = "white", col = "navy", size = .2) +
theme_void() +
labs(title = title, caption = paste("This tesselation contains:", nrow(data), "tiles")) +
theme(plot.title = element_text(hjust = .5, color = "black", face = "bold"))}
#Original
plot_tess(data = CONUS_simp, "Original County Data")
#Voronoi
v_grid = st_intersection(v_grid, st_union(CONUS_simp))
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
plot_tess(v_grid, "Voronoi") +
geom_sf(data = county_centroid, col = "red", size = 0.2)
#Triangulated
t_grid = st_intersection(t_grid, st_union(CONUS_simp))
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
plot_tess(t_grid, "Triangulated") +
geom_sf(data = county_centroid, col = "red", size = 0.2)
#Gridded
plot_tess(sq_grid, "Square Coverage")
#Hexagonal
plot_tess(hex_grid, "Hexogonal Coverage")
#Question 2
#2.1
sf_to_df = function(sf_object, description) {
area_sf= st_area(sf_object) %>%
set_units('km^2') %>%
drop_units()
area_df = data.frame(tesselation = description, features = length(area_sf), mean_area = mean(area_sf), std = sd(area_sf), total_area = sum(area_sf))
return(area_df)
}
description= "test"
#2.2
voroni_df = sf_to_df(v_grid, "voroni tesselation")
tri_df = sf_to_df(t_grid, "triangulation tesselation")
sq_df = sf_to_df(sq_grid, "square grid tesselation")
hex_df = sf_to_df(hex_grid, "hexagonal tesselation")
counties_df = sf_to_df(CONUS, "county")
#2.3
tess_summary = bind_rows(
sf_to_df(t_grid, "triangulation"),
sf_to_df(v_grid, "voroni"),
sf_to_df(sq_grid, "square grid"),
sf_to_df(hex_grid, "hexagonal grid"),
sf_to_df(sf_object= CONUS, "county"))
#2.4
?mean()
knitr::kable(tess_summary, caption = "US county tesselations", col.names = c("Tesselations", "Number of features", "Mean Area", "Standard Deviation", "Total area"))
| Tesselations | Number of features | Mean Area | Standard Deviation | Total area |
|---|---|---|---|---|
| triangulation | 6195 | 1253.691 | 1578.647 | 7766614 |
| voroni | 3106 | 2525.425 | 2886.661 | 7843970 |
| square grid | 2256 | 3761.443 | 0.000 | 8485816 |
| hexagonal grid | 1175 | 7368.799 | 0.000 | 8658339 |
| county | 3108 | 2521.745 | 3404.325 | 7837583 |
#2.5 Comment - As you move from triangulation at the top of the chart to the original county data atthe bottom, the area moves from smallest to largest. Each individual tesselation returns a specific part of the US and counties. The point in polygon will be affected by the different areas returned by each of the tesselations.
#Question 3
#3.1
NID2019_U = read_excel("/users/owner1/github/geog-176A-labs/data/NID2019_U.xlsx") %>%
filter(!is.na(LATITUDE)) %>%
st_as_sf(coords = c("LONGITUDE", "LATITUDE"), crs = 4326) %>%
st_transform(5070)
#3.2
point_in_polygon = function(points, polygon, group){
st_join(polygon, points) %>%
st_drop_geometry() %>%
count(get(group)) %>%
setNames(c(group, "n")) %>%
left_join(polygon, by = group) %>%
st_as_sf()
}
#3.3
voroni_pip = point_in_polygon(NID2019_U, polygon = v_grid, group = "id")
triangulation_pip = point_in_polygon(NID2019_U, t_grid, group = "id")
grid_pip = point_in_polygon(NID2019_U, sq_grid, group = "id")
hexagonal_pip = point_in_polygon(NID2019_U, hex_grid, group = "id")
county_pip = point_in_polygon(NID2019_U, CONUS, group = "geoid")
#3.4
plot_pip = function(data, title){
ggplot() +
geom_sf(data = data, aes(fill = n), col = NA, alpha = .9, size= .2) +
scale_fill_gradient(low = "white", high = "blue") +
theme_void() +
labs(title = "dams in the US", caption = paste0(sum(data$n), "dams represented")) +
theme(legend.position = "none", plot.title = element_text(face = "bold", color = "blue", hjust = .5, size = 24))
}
plot_pip(voroni_pip, "Voroni Point in Polygon")
plot_pip(county_pip, "Raw County Data Point in Polygon")
#3.5
voroni_pip = point_in_polygon(NID2019_U, polygon = v_grid, group = "id")
triangulation_pip = point_in_polygon(NID2019_U, t_grid, group = "id")
grid_pip = point_in_polygon(NID2019_U, sq_grid, group = "id")
hexagonal_pip = point_in_polygon(NID2019_U, hex_grid, group = "id")
county_pip = point_in_polygon(NID2019_U, CONUS, group = "geoid")
plot_pip(voroni_pip)
plot_pip(triangulation_pip)
plot_pip(grid_pip)
plot_pip(hexagonal_pip)
plot_pip(county_pip)
The hex and square tesselations provided a different result than the others. This is because the others were plotted by county and the hex and squares were plotted under equal ratios of area. Moving forward I am only going to use the hexagonal tesselation. It was either that or square tesselation becasue of the fact that it takes into account the area and size rather than just the county.
#Question 4
#4.1
fireprot_dams = point_in_polygon(filter(NID2019_U, grepl("P", NID2019_U$PURPOSES)), hex_grid, "id")
watersup_dams = point_in_polygon(filter(NID2019_U, grepl("S", NID2019_U$PURPOSES)), hex_grid, "id")
hydroelec_dams = point_in_polygon(filter(NID2019_U, grepl("H", NID2019_U$PURPOSES)), hex_grid, "id")
irrig_dams = point_in_polygon(filter(NID2019_U, grepl("I", NID2019_U$PURPOSES)), hex_grid, "id")
#4.2
plot_pip2 = function(data, text){
ggplot() +
geom_sf(data = data, aes(fill = n), alpha = .9, size = .2) +
gghighlight(n > mean(n) +sd(n)) +
viridis::scale_fill_viridis() +
theme_void() +
theme(plot.title = element_text(face = "bold", color = "black", size = 24), plot.subtitle = element_text(size = 12),
plot.caption = element_text(face = "bold", size = 12), legend.title = element_text(face = "bold"),
legend.text = element_text(face = "bold")) +
labs(title = text,
subtitle = "Data Source: National Inventory of Dams",
fill = "Number of dams",
caption = paste0(sum(data$n), "dams")) +
theme(aspect.ratio = .5)
}
#4.3
plot_pip2(fireprot_dams, "Fire Protection Dams")
plot_pip2(watersup_dams, "Water Suppy Dams")
plot_pip2(hydroelec_dams, "Hydroelectric Dams")
plot_pip2(irrig_dams, "Irrigation Dams")
#Question 4 Comments Fire protection dams are mainly central US. Possibly due to the drier climates which allow for fires to start easier. Water supply dams are scattered throughout the east, central, and west US; located on bigger water supplys.Hydroelectric dams are also scattered but also seem to be most dense in the Northeastern US. Irrigation dams are found more on the west US; this oculd be due to the fact that the western US does not receive nearly as much rainfall than Eastern US